Calculate the equal-weighted MI for the H3N2 polymerase
Read in data
seq_times <- readRDS("../data/seq_times_091122.rda")
polHA_groups <- seq_times %>%
select(ID = rowname, group = date)
polHA <- readMSA("../../H3N2_Processing/polha_aa.fasta")
Calculate MI
- Uncomment code to actually run
- Warning: for H3N2 pol-HA, takes ~1 hour with ncores
= 7
# polHA_MI <- calculateMI(polHA, weighted = TRUE, groups = polHA_groups,
# weights = "equal", ncores = 7)
#
# saveRDS(polHA_MI, "../data/h3n2polHA_equalMI.rds")
Generate Network Viz
- Use associationsubgraphs package
- Set default step to “min_max_rule”
- Use mi_network_info to get readable index names
polHA_MI <- readRDS("../data/h3n2polHA_equalMI.rds")
mi_network_info <- readRDS("../data/mi_network_info.rds")
polHA_MI_IDs <- polHA_MI %>%
left_join(mi_network_info, by = c("V1" = "index")) %>%
left_join(mi_network_info, by = c("V2" = "index")) %>%
filter(z_score > 4) %>%
select(a = "id.x", b = "id.y", strength = z_score)
# saveRDS(polHA_MI_IDs,"/Users/sarcos/Desktop/Lauring Lab/MI_Networks_App/data/polHA_MI.rds")
visualize_subgraph_structure(
association_pairs = polHA_MI_IDs,
node_info = mi_network_info %>% select(-index),
trim_subgraph_results = FALSE,
default_step = "min_max_rule"
)
## Calculating subgraph structure results...
## ...finished
## Warning in visualize_subgraph_structure(association_pairs = polHA_MI_IDs, :
## There are 2472 ids in the node_info dataframe that were not seen in association
## pairs. These are omitted.
Tally high-scoring MIp among polymerase subunits
- “High-scoring” means z-score > 4
polHA_MI_tallys <- polHA_MI_IDs %>%
mutate(grouping = paste(a,b, sep = ", ")) %>%
mutate(grouping = str_remove_all(grouping, pattern = "_[0-9]*")) %>%
count(grouping) %>%
arrange(n) %>%
mutate(grouping = factor(grouping, levels = unique(grouping)))
ggplot(polHA_MI_tallys, aes(x = grouping, y = n)) +
geom_col(alpha = 0.7, fill = "grey") +
theme_classic() +
labs(title = "wMI pairs within and between polymerase proteins and HA",
x = "Group", y = "Count") +
theme(text = element_text(size = 12),
legend.position = "none")

ggsave("../Figures/mipHA_pairs_all.pdf",
width = 6.8, height = 3)
polHA_MI_tallys_summary <- polHA_MI_IDs %>%
mutate(group1 = paste(a,b, sep = ", ")) %>%
mutate(group1 = str_remove_all(group1, pattern = "_[0-9]*")) %>%
mutate(grouping = case_when(
group1 %in% c("PB1, HA", "PB2, HA", "PA, HA") ~ "Pol-HA",
group1 == "HA, HA" ~ "HA-HA",
TRUE ~ "Pol-Pol"
)) %>%
count(grouping) %>%
arrange(n) %>%
mutate(grouping = factor(grouping, levels = unique(grouping)))
ggplot(polHA_MI_tallys_summary, aes(x = grouping, y = n)) +
geom_col(alpha = 0.7, fill = "grey") +
theme_classic() +
labs(title = "wMI pairs within and between polymerase proteins and HA",
x = "Group", y = "Count") +
theme(text = element_text(size = 12),
legend.position = "none")

# ggsave("../Figures/mipHA_pairs_summary.pdf",
# width = 5, height = 3)
Plot amino acid frequencies for top subgraphs with HA
#Subgraph 5
plot_example_freqs(c(194, 338, 569, 2243))

# ggsave("../Figures/freqHA_subgraph5.pdf",
# width = 4, height = 3)
#Subgraph 9
plot_example_freqs(c(697, 1828, 1859, 2089, 2305))

# ggsave("../Figures/freqHA_subgraph9.pdf",
# width = 4, height = 3.5)
Plot amino acid frequencies for top subgraphs without HA
#Subgraph 4
plot_example_freqs(c(1378,1468))

# ggsave("../Figures/freqHA_subgraph4.pdf",
# width = 4, height = 2)
#Subgraph 8
plot_example_freqs(c(811, 1335))

# ggsave("../Figures/freqHA_subgraph8.pdf",
# width = 4, height = 2)
#Subgraph 10
plot_example_freqs(c(107, 1228, 1866))

# ggsave("../Figures/freqHA_subgraph10.pdf",
# width = 4, height = 2.5)
Plot distribution of top MIp among groups
polHA_MI_groups <- polHA_MI_IDs %>%
mutate(grouping = paste(a,b, sep = ", ")) %>%
mutate(grouping = str_remove_all(grouping, pattern = "_[0-9]*"))
ggplot(polHA_MI_groups, aes(x = factor(grouping, levels = polHA_MI_tallys$grouping), y = log10(strength))) +
geom_violin() +
stat_summary(fun.data = "mean_cl_boot", geom = "pointrange",
colour = "red") +
theme_classic() +
theme(text = element_text(size = 12),
legend.position = "none")

polHA_MI_summarygroups <- polHA_MI_IDs %>%
mutate(group1 = paste(a,b, sep = ", ")) %>%
mutate(group1 = str_remove_all(group1, pattern = "_[0-9]*")) %>%
mutate(grouping = case_when(
group1 %in% c("PB1, HA", "PB2, HA", "PA, HA") ~ "Pol-HA",
group1 == "HA, HA" ~ "HA-HA",
TRUE ~ "Pol-Pol"
))
ggplot(polHA_MI_summarygroups, aes(x = factor(grouping, levels = polHA_MI_tallys_summary$grouping), y = log10(strength))) +
geom_violin() +
stat_summary(fun.data = "mean_cl_boot", geom = "pointrange",
colour = "red") +
theme_classic() +
theme(text = element_text(size = 12),
legend.position = "none")
